suppressMessages(
c(library(ggplot2),
library(mixOmics),
library(gplots),
library(RColorBrewer),
library(readr),
library(dplyr),
library(tidyverse),
library(knitr),
library(rafalib),
library(reshape),
library(clusterProfiler),
library(org.Mm.eg.db),
library(AnnotationDbi))
)
## [1] "ggplot2" "stats" "graphics" "grDevices"
## [5] "utils" "datasets" "methods" "base"
## [9] "mixOmics" "lattice" "MASS" "ggplot2"
## [13] "stats" "graphics" "grDevices" "utils"
## [17] "datasets" "methods" "base" "gplots"
## [21] "mixOmics" "lattice" "MASS" "ggplot2"
## [25] "stats" "graphics" "grDevices" "utils"
## [29] "datasets" "methods" "base" "RColorBrewer"
## [33] "gplots" "mixOmics" "lattice" "MASS"
## [37] "ggplot2" "stats" "graphics" "grDevices"
## [41] "utils" "datasets" "methods" "base"
## [45] "readr" "RColorBrewer" "gplots" "mixOmics"
## [49] "lattice" "MASS" "ggplot2" "stats"
## [53] "graphics" "grDevices" "utils" "datasets"
## [57] "methods" "base" "dplyr" "readr"
## [61] "RColorBrewer" "gplots" "mixOmics" "lattice"
## [65] "MASS" "ggplot2" "stats" "graphics"
## [69] "grDevices" "utils" "datasets" "methods"
## [73] "base" "forcats" "stringr" "purrr"
## [77] "tidyr" "tibble" "tidyverse" "dplyr"
## [81] "readr" "RColorBrewer" "gplots" "mixOmics"
## [85] "lattice" "MASS" "ggplot2" "stats"
## [89] "graphics" "grDevices" "utils" "datasets"
## [93] "methods" "base" "knitr" "forcats"
## [97] "stringr" "purrr" "tidyr" "tibble"
## [101] "tidyverse" "dplyr" "readr" "RColorBrewer"
## [105] "gplots" "mixOmics" "lattice" "MASS"
## [109] "ggplot2" "stats" "graphics" "grDevices"
## [113] "utils" "datasets" "methods" "base"
## [117] "rafalib" "knitr" "forcats" "stringr"
## [121] "purrr" "tidyr" "tibble" "tidyverse"
## [125] "dplyr" "readr" "RColorBrewer" "gplots"
## [129] "mixOmics" "lattice" "MASS" "ggplot2"
## [133] "stats" "graphics" "grDevices" "utils"
## [137] "datasets" "methods" "base" "reshape"
## [141] "rafalib" "knitr" "forcats" "stringr"
## [145] "purrr" "tidyr" "tibble" "tidyverse"
## [149] "dplyr" "readr" "RColorBrewer" "gplots"
## [153] "mixOmics" "lattice" "MASS" "ggplot2"
## [157] "stats" "graphics" "grDevices" "utils"
## [161] "datasets" "methods" "base" "clusterProfiler"
## [165] "reshape" "rafalib" "knitr" "forcats"
## [169] "stringr" "purrr" "tidyr" "tibble"
## [173] "tidyverse" "dplyr" "readr" "RColorBrewer"
## [177] "gplots" "mixOmics" "lattice" "MASS"
## [181] "ggplot2" "stats" "graphics" "grDevices"
## [185] "utils" "datasets" "methods" "base"
## [189] "org.Mm.eg.db" "AnnotationDbi" "IRanges" "S4Vectors"
## [193] "Biobase" "BiocGenerics" "parallel" "stats4"
## [197] "clusterProfiler" "reshape" "rafalib" "knitr"
## [201] "forcats" "stringr" "purrr" "tidyr"
## [205] "tibble" "tidyverse" "dplyr" "readr"
## [209] "RColorBrewer" "gplots" "mixOmics" "lattice"
## [213] "MASS" "ggplot2" "stats" "graphics"
## [217] "grDevices" "utils" "datasets" "methods"
## [221] "base" "org.Mm.eg.db" "AnnotationDbi" "IRanges"
## [225] "S4Vectors" "Biobase" "BiocGenerics" "parallel"
## [229] "stats4" "clusterProfiler" "reshape" "rafalib"
## [233] "knitr" "forcats" "stringr" "purrr"
## [237] "tidyr" "tibble" "tidyverse" "dplyr"
## [241] "readr" "RColorBrewer" "gplots" "mixOmics"
## [245] "lattice" "MASS" "ggplot2" "stats"
## [249] "graphics" "grDevices" "utils" "datasets"
## [253] "methods" "base"
We will select the top 5000 genes with the highest variances.
VST_TNF <- read_csv("VST_TNF.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## X1 = col_character()
## )
## See spec(...) for full column specifications.
dat <- as.data.frame(VST_TNF[, c(18:22, 8:12, 2:7)])
rownames(dat) <- VST_TNF$X1
# calculate row variance
row_var <- apply(dat, 1, var)
filter_dat <- dat[order(-row_var)[1:5000],]
# Plot the heatmap
suppressMessages(library(mosaic))
for (i in 1:dim(filter_dat)[1]) {
filter_dat[i,1:16] <- zscore(as.numeric(filter_dat[i,1:16]))
}
my_palette <- colorRampPalette(c("blue", "white", "red"))(128)
heatmap_matrix <- as.matrix(filter_dat[,1:16])
png('Top 5000 variance.png',
width = 1000,
height = 1200,
res = 200,
pointsize = 8)
heatmap.2(heatmap_matrix,
main = "Top 5000 variance",
density.info = "none",
key = TRUE,
lhei = c(1,7),
col=my_palette,
cexCol = 1,
margins = c(8,2),
trace = "none",
dendrogram = "row",
labRow = FALSE,
keysize = 2,
ylab = "Genes",
Colv = "NA")
dev.off()
## quartz_off_screen
## 2
pdf('Top 5000 variance.pdf',
width = 8,
height = 10)
heatmap.2(heatmap_matrix,
main = "Top 5000 variance",
density.info = "none",
key = TRUE,
lhei = c(1,7),
col=my_palette,
cexCol = 1,
margins = c(8,2),
trace = "none",
dendrogram = "row",
labRow = FALSE,
keysize = 2,
ylab = "Genes",
Colv = "NA")
dev.off()
## quartz_off_screen
## 2
include_graphics('Top 5000 variance.png')
row_clust <- hclust(dist(filter_dat, method = 'euclidean'), method = 'ward.D2')
plot(row_clust, labels = FALSE)
abline(h = 34, col = "red")
# seperate clusters
h = 34
clusters <- cutree(row_clust, h = h)
# number of distinct cluster we get
length(unique(clusters))
## [1] 11
# generate of list of genes for the 10 clusters
genecluster <- function(n) {
index <- which(clusters == n)
row_clust$labels[index]
}
cluster_gene <- lapply(1:length(unique(clusters)), genecluster)
names(cluster_gene) <- paste("cluster", c(1:length(unique(clusters))))
Now we need to make a function that would plot a boxplot for all the genes in each cluster.
cluster_boxplot <- function(a) {
# plot the boxplots
gene_list <- cluster_gene[[a]]
cluster_data <- filter_dat[gene_list,]
suppressMessages(df <- melt(cluster_data, variable_name = "Samples"))
df <- data.frame(df, Condition = substr(df$Samples,1,4))
ggplot(df, aes(x=Samples, y=value, fill = Condition)) + geom_boxplot() + xlab("") +
ylab("Z-scored gene expression") + scale_fill_manual(values = c("#619CFF", "#F564E3", "#E69F00")) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle(names(cluster_gene)[a])
}
lapply(1:11, cluster_boxplot)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
cluster_heatmap <- function(a) {
gene_list <- cluster_gene[[a]]
# plot the heatmap
heatmap_matrix <- as.matrix(filter_dat[gene_list,])
png(paste(names(cluster_gene)[a], "heatmap.png"),
width = 800,
height = 1000,
res = 150,
pointsize = 8)
heatmap.2(heatmap_matrix,
main = paste(names(cluster_gene)[a], "heatmap"),
density.info = "none",
key = TRUE,
lhei = c(1,7),
col=my_palette,
cexCol = 1,
margins = c(8,2),
trace = "none",
dendrogram = "row",
labRow = FALSE,
keysize = 2,
ylab = "Genes",
Colv = "NA")
dev.off()
}
lapply(1:11, cluster_heatmap)
## [[1]]
## quartz_off_screen
## 2
##
## [[2]]
## quartz_off_screen
## 2
##
## [[3]]
## quartz_off_screen
## 2
##
## [[4]]
## quartz_off_screen
## 2
##
## [[5]]
## quartz_off_screen
## 2
##
## [[6]]
## quartz_off_screen
## 2
##
## [[7]]
## quartz_off_screen
## 2
##
## [[8]]
## quartz_off_screen
## 2
##
## [[9]]
## quartz_off_screen
## 2
##
## [[10]]
## quartz_off_screen
## 2
##
## [[11]]
## quartz_off_screen
## 2
include_graphics("cluster 1 heatmap.png")
include_graphics("cluster 2 heatmap.png")
include_graphics("cluster 3 heatmap.png")
include_graphics("cluster 4 heatmap.png")
include_graphics("cluster 5 heatmap.png")
include_graphics("cluster 6 heatmap.png")
include_graphics("cluster 7 heatmap.png")
include_graphics("cluster 8 heatmap.png")
include_graphics("cluster 9 heatmap.png")
include_graphics("cluster 10 heatmap.png")
include_graphics("cluster 11 heatmap.png")
Based on the previous analysis, cluster 4 and 11 are selected as being inflammation related. Cluster 7 and 8 are selected as MK2 signature clusters.
# Convert Ensembl ID to Entrez ID
detected_gene <- rownames(filter_dat)
annotations_entrez <- AnnotationDbi::select(org.Mm.eg.db,
keys = detected_gene,
columns = c("ENTREZID"),
keytype = "ENSEMBL")
## 'select()' returned 1:many mapping between keys and columns
detected_gene_entrez <- as.character(unique(annotations_entrez$ENTREZID[!is.na(annotations_entrez$ENTREZID)]))
## for each cluster
lapply(c(4,11,7,8), function(i) {
target_gene <- cluster_gene[[i]]
annotations_entrez <- AnnotationDbi::select(org.Mm.eg.db,
keys = target_gene,
columns = c("ENTREZID"),
keytype = "ENSEMBL")
target_gene_entrez <- as.character(unique(annotations_entrez$ENTREZID[!is.na(annotations_entrez$ENTREZID)]))
kk <- enrichKEGG(gene = target_gene_entrez, universe = detected_gene_entrez, organism = 'mmu')
write.csv(as.data.frame(kk), paste("KEGG analysis/KEGG_cluster ", i, ".csv", sep = ""))
dotplot(kk, showCategory = 20) + labs(title = paste("KEGG for genes in", names(cluster_gene)[i]))
})
## 'select()' returned 1:many mapping between keys and columns
## Reading KEGG annotation online:
##
## Reading KEGG annotation online:
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## 'select()' returned 1:many mapping between keys and columns
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]